package main.scala

import java.net.URL
import java.io.FileOutputStream
import java.net.HttpURLConnection
import org.apache.http.impl.client.DefaultHttpClient
import org.apache.http.client.methods.HttpGet
import org.apache.http.HttpStatus
import java.io.InputStream
import java.io.ByteArrayOutputStream
import org.apache.http.impl.client.HttpClients
import org.apache.http.util.EntityUtils
import org.jsoup.Jsoup
import org.jsoup.select.Elements
import org.jsoup.nodes.Element

class Spider{

  def get(url:String) {
    val client = HttpClients.createDefault()
    try {
      val httpget = new HttpGet(url)
//      httpget.getURI.get
      val response = client.execute(httpget)
      val entity = response.getEntity
      val doc = Jsoup.parse(EntityUtils.toString(entity))
      //打印所有文本
      println(doc.body.text())
      //打印所有链接
      doc.select("a").toArray().map { x =>if(x!=null) println(x.asInstanceOf[Element].attr("abs:href")) }
    } catch {
      case e: Exception =>e.printStackTrace()
    }
  }
}

object Spider extends App {
  //目标Url
  val urlstring = "http://weibo.cn/zhouhongyi?vt=4&page="
  val s = new Spider
  //爬取第0到9页
  for(i<-0 to 9){
    val start = System.currentTimeMillis();
    s.get(urlstring+i)
    Thread.sleep(5000)
  }
}